knitr::opts_chunk$set(message = FALSE)

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Data input and cleaning

bakery_df =
  read_csv("./Data/Bakery_sales.csv") %>% 
  janitor::clean_names() %>% 
  mutate(
    unit_price = str_replace(unit_price, "€", ""),
    unit_price = str_replace(unit_price, ",", "."),
    unit_price = as.numeric(unit_price),
    product_name = article) %>% 
  filter(product_name != ".") %>% 
  select(-article)

bakery_df
## # A tibble: 234,000 × 7
##       x1 date       time   ticket_number quantity unit_price product_name       
##    <dbl> <date>     <time>         <dbl>    <dbl>      <dbl> <chr>              
##  1     0 2021-01-02 08:38         150040        1       0.9  BAGUETTE           
##  2     1 2021-01-02 08:38         150040        3       1.2  PAIN AU CHOCOLAT   
##  3     4 2021-01-02 09:14         150041        2       1.2  PAIN AU CHOCOLAT   
##  4     5 2021-01-02 09:14         150041        1       1.15 PAIN               
##  5     8 2021-01-02 09:25         150042        5       1.2  TRADITIONAL BAGUET…
##  6    11 2021-01-02 09:25         150043        2       0.9  BAGUETTE           
##  7    12 2021-01-02 09:25         150043        3       1.1  CROISSANT          
##  8    15 2021-01-02 09:27         150044        1       1.05 BANETTE            
##  9    18 2021-01-02 09:32         150045        3       1.2  TRADITIONAL BAGUET…
## 10    19 2021-01-02 09:32         150045        6       1.1  CROISSANT          
## # … with 233,990 more rows

A pie chart showing the percentage of each product’s sale count (top 10)

plot_pie =
  bakery_df %>% 
  group_by(product_name) %>% 
  summarize(n_obs = n()) %>% 
  arrange(desc(n_obs)) %>% 
  head(10) %>% 
  plot_ly(labels = ~product_name, values = ~n_obs, type = 'pie', colors = "viridis") %>% 
  layout(title = 'Top 10 favoriate bakeries',
         xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
         yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))

plot_pie

A line plot of trends showing total sale revenue by month (trend)

plot_line =
  bakery_df %>% 
  separate(date, into = c("year", "month", "day"), sep = "-") %>% 
  mutate(year = as.numeric(year),
         month = as.integer(month),
         day = as.integer(day),) %>% 
  group_by(year, month) %>% 
  mutate(rev = quantity * unit_price) %>% 
  summarize(month_rev = sum(rev)) %>% 
  
  plot_ly(x = ~month, y = ~month_rev, mode = 'lines+markers', alpha = 0.5)

plot_line